
clear
set more off

cd "D:\data_replication"


// Import Intermediate by HS6
//------------------------------------------------------------------------------

import delimited data\intermediates\intermediates_HS6.csv

rename productcode HS6
keep HS6
save data\intermediates\intermediates_HS6.dta, replace


// 2003 - Concord Intermediate to pc8plus
//------------------------------------------------------------------------------

use data\crosswalks\cn8_to_pc8\crosswalk_cn8_pc8_2003, clear
gen HS6 = substr(cn8_string, 1, 6)
destring HS6, replace
merge m:1 HS6 using data\intermediates\intermediates_HS6.dta
drop if _merge ==2
gen intermediate = 0
replace intermediate = 1 if _merge == 3

sort pc8_string
by pc8_string: egen intermediate_pc8 = mean(intermediate)
by pc8_string: keep if _n == 1

keep pc8_string year intermediate_pc8
rename pc8_string prccode

// Merge pc8 to pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\crosswalk_pc8_pc8plus_2003_2007_temp.dta
gen match = 3
replace match = 1 if _merge == 1
drop if _merge == 2    // products not in dataset
drop _merge
gen pc8plus_temp = pc8plus
drop pc8plus

// Merge pc8 to manually matched pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\manually_matched_pc8_over_time.dta
drop if _merge == 2
tab prccode if _merge == 1 & match == 1
replace pc8plus_temp = pc8plus if _merge == 3
drop _merge pc8plus

// Merge pc8 to the second group of manually matched pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\manually_matched_pc8_over_time_2.dta
replace pc8plus_temp = pc8plus if _merge == 3
drop if _merge == 2
drop exit synthetic match _merge pc8plus
rename pc8plus_temp pc8plus

sort pc8plus
by pc8plus: egen intermediate_pc8plus = mean(intermediate_pc8) 
by pc8plus: keep if _n == 1

keep pc8plus intermediate_pc8plus
gen year = 2003
save data\intermediates\intermediate_2003.dta, replace


// 2004 - Concord Intermediate to pc8plus
//------------------------------------------------------------------------------

use data\crosswalks\cn8_to_pc8\crosswalk_cn8_pc8_2004, clear
gen HS6 = substr(cn8_string, 1, 6)
destring HS6, replace
merge m:1 HS6 using data\intermediates\intermediates_HS6.dta
drop if _merge ==2
gen intermediate = 0
replace intermediate = 1 if _merge == 3

sort pc8_string
by pc8_string: egen intermediate_pc8 = mean(intermediate)
by pc8_string: keep if _n == 1

keep pc8_string year intermediate_pc8
rename pc8_string prccode

// Merge pc8 to pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\crosswalk_pc8_pc8plus_2003_2007_temp.dta
gen match = 3
replace match = 1 if _merge == 1
drop if _merge == 2    // products not in dataset
drop _merge
gen pc8plus_temp = pc8plus
drop pc8plus

// Merge pc8 to manually matched pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\manually_matched_pc8_over_time.dta
drop if _merge == 2
replace pc8plus_temp = pc8plus if _merge == 3
drop _merge pc8plus

// Merge pc8 to the second group of manually matched pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\manually_matched_pc8_over_time_2.dta
replace pc8plus_temp = pc8plus if _merge == 3
drop if _merge == 2
drop exit synthetic match _merge pc8plus
rename pc8plus_temp pc8plus

sort pc8plus
by pc8plus: egen intermediate_pc8plus = mean(intermediate_pc8) 
by pc8plus: keep if _n == 1

keep pc8plus intermediate_pc8plus
gen year = 2004
save data\intermediates\intermediate_2004.dta, replace


// 2005 - Concord Intermediate to pc8plus
//------------------------------------------------------------------------------

use data\crosswalks\cn8_to_pc8\crosswalk_cn8_pc8_2005, clear
gen HS6 = substr(cn8_string, 1, 6)
destring HS6, replace
merge m:1 HS6 using data\intermediates\intermediates_HS6.dta
drop if _merge ==2
gen intermediate = 0
replace intermediate = 1 if _merge == 3

sort pc8_string
by pc8_string: egen intermediate_pc8 = mean(intermediate)
by pc8_string: keep if _n == 1

keep pc8_string year intermediate_pc8
rename pc8_string prccode

// Merge pc8 to pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\crosswalk_pc8_pc8plus_2003_2007_temp.dta
gen match = 3
replace match = 1 if _merge == 1
drop if _merge == 2    // products not in dataset
drop _merge
gen pc8plus_temp = pc8plus
drop pc8plus

// Merge pc8 to manually matched pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\manually_matched_pc8_over_time.dta
drop if _merge == 2
replace pc8plus_temp = pc8plus if _merge == 3
drop _merge pc8plus

// Merge pc8 to the second group of manually matched pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\manually_matched_pc8_over_time_2.dta
replace pc8plus_temp = pc8plus if _merge == 3
drop if _merge == 2
drop exit synthetic match _merge pc8plus
rename pc8plus_temp pc8plus

sort pc8plus
by pc8plus: egen intermediate_pc8plus = mean(intermediate_pc8) 
by pc8plus: keep if _n == 1

keep pc8plus intermediate_pc8plus
gen year = 2005
save data\intermediates\intermediate_2005.dta, replace


// 2006 - Concord Intermediate to pc8plus
//------------------------------------------------------------------------------

use data\crosswalks\cn8_to_pc8\crosswalk_cn8_pc8_2006, clear
gen HS6 = substr(cn8_string, 1, 6)
destring HS6, replace
merge m:1 HS6 using data\intermediates\intermediates_HS6.dta
drop if _merge ==2
gen intermediate = 0
replace intermediate = 1 if _merge == 3

sort pc8_string
by pc8_string: egen intermediate_pc8 = mean(intermediate)
by pc8_string: keep if _n == 1

keep pc8_string year intermediate_pc8
rename pc8_string prccode

// Merge pc8 to pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\crosswalk_pc8_pc8plus_2003_2007_temp.dta
gen match = 3
replace match = 1 if _merge == 1
drop if _merge == 2    // products not in dataset
drop _merge
gen pc8plus_temp = pc8plus
drop pc8plus

// Merge pc8 to manually matched pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\manually_matched_pc8_over_time.dta
drop if _merge == 2
replace pc8plus_temp = pc8plus if _merge == 3
drop _merge pc8plus

// Merge pc8 to the second group of manually matched pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\manually_matched_pc8_over_time_2.dta
replace pc8plus_temp = pc8plus if _merge == 3
drop if _merge == 2
drop exit synthetic match _merge pc8plus
rename pc8plus_temp pc8plus

sort pc8plus
by pc8plus: egen intermediate_pc8plus = mean(intermediate_pc8) 
by pc8plus: keep if _n == 1

keep pc8plus intermediate_pc8plus
gen year = 2006
save data\intermediates\intermediate_2006.dta, replace


// 2007 - Concord Intermediate to pc8plus
//------------------------------------------------------------------------------

use data\crosswalks\cn8_to_pc8\crosswalk_cn8_pc8_2007, clear
gen HS6 = substr(cn8_string, 1, 6)
destring HS6, replace
merge m:1 HS6 using data\intermediates\intermediates_HS6.dta
drop if _merge ==2
gen intermediate = 0
replace intermediate = 1 if _merge == 3

sort pc8_string
by pc8_string: egen intermediate_pc8 = mean(intermediate)
by pc8_string: keep if _n == 1

keep pc8_string year intermediate_pc8
rename pc8_string prccode

// Merge pc8 to pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\crosswalk_pc8_pc8plus_2003_2007_temp.dta
gen match = 3
replace match = 1 if _merge == 1
drop if _merge == 2    // products not in dataset
drop _merge
gen pc8plus_temp = pc8plus
drop pc8plus

// Merge pc8 to manually matched pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\manually_matched_pc8_over_time.dta
drop if _merge == 2
replace pc8plus_temp = pc8plus if _merge == 3
drop _merge pc8plus

// Merge pc8 to the second group of manually matched pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\manually_matched_pc8_over_time_2.dta
replace pc8plus_temp = pc8plus if _merge == 3
drop if _merge == 2
drop exit synthetic match _merge pc8plus
rename pc8plus_temp pc8plus

sort pc8plus
by pc8plus: egen intermediate_pc8plus = mean(intermediate_pc8) 
by pc8plus: keep if _n == 1

keep pc8plus intermediate_pc8plus
gen year = 2007
save data\intermediates\intermediate_2007.dta, replace


// Merge Intermediate over all Years
//------------------------------------------------------------------------------

use data\intermediates\intermediate_2003.dta, clear
append using data\intermediates\intermediate_2004.dta
append using data\intermediates\intermediate_2005.dta
append using data\intermediates\intermediate_2006.dta
append using data\intermediates\intermediate_2007.dta
drop if pc8plus == ""
sort pc8plus
by pc8plus: egen intermediate_pc8plus_temp = mean(intermediate_pc8plus)
by pc8plus: keep if _n == 1
drop intermediate_pc8plus
rename intermediate_pc8plus_temp intermediate_pc8plus
keep pc8plus intermediate_pc8plus
save data\intermediates\intermediate_temp.dta, replace


// Match Intermediate to Product IDs
//------------------------------------------------------------------------------

use estimation\1_data_format\data_base.dta, clear
merge m:1 pc8plus using data\intermediates\intermediate_temp.dta
drop if _merge == 2
keep product_id pc8plus intermediate_pc8plus
sort product_id 
by product_id: keep if _n == 1
gen intermediate = 0
replace intermediate = 1 if intermediate_pc8plus == 1							// Only if 100% certain that intermediate
keep product_id pc8plus intermediate

export delimited using data\intermediates\intermediate_pc8plus.csv, replace


// Delete Temporary Files
//------------------------------------------------------------------------------

rm data\intermediates\intermediate_temp.dta
rm data\intermediates\intermediate_2003.dta
rm data\intermediates\intermediate_2004.dta
rm data\intermediates\intermediate_2005.dta
rm data\intermediates\intermediate_2006.dta
rm data\intermediates\intermediate_2007.dta


